ast<-read_delim("Ecoli_AST.tsv.gz") %>% rename(BioSample=`#BioSample`)
length(unique(ast$BioSample))
## [1] 9094
ast %>% group_by(BioSample) %>% summarise(n=length(unique((Antibiotic))))%>% pull(n) %>% summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 14.00 15.00 15.33 18.00 36.00
afp <-read_tsv("ATB_Ecoli_AFP.tsv.gz")
gene_counts <- afp %>% group_by(`Gene symbol`, Class, Subclass) %>% count()
species_calls <- read_tsv("species_calls.tsv.gz")
ecoli <- species_calls %>% filter(Species=="Escherichia coli") %>% pull(Sample)
afp_status <- read_tsv("AMRFP_status.tsv.gz") %>% filter(sample %in% ecoli)
denominator <- afp_status %>% filter(status=="PASS") %>% nrow()
# afp results including null row for each genome that ran but returned no hits
afp_all <- afp_status %>% rename(Name=sample) %>% left_join(afp) %>% filter(status=="PASS")
ec_types <- read_tsv(file="ec_types_enterobase.tsv") %>% rename(Name=sample_accession)
sum(unique(ast$BioSample) %in% afp$Name)
## [1] 6768
# AST data for strains with AMRfinder run successfully
ast_matched <- ast %>% filter(BioSample %in% afp_status$sample[afp_status$status=="PASS"])
# AMRfinder results, for strains with matching AST data from NCBI
afp_matched <- afp %>% filter(Name %in% ast$BioSample)
# check objects
dim(ast_matched)
## [1] 107750 17
ast_matched %>% pull(BioSample) %>% unique() %>% length()
## [1] 6768
dim(afp_matched)
## [1] 184325 7
afp_matched %>% pull(Name) %>% unique() %>% length()
## [1] 6768
# number with pmrB_Y358N
(gene_counts %>% filter(`Gene symbol`=="pmrB_Y358N") %>% pull(n))/denominator
## [1] 0.4652073
pmrB_Y358N <- summarise_marker_dist_ec_types(afp_all=afp_all, ec_types=ec_types, marker="pmrB_Y358N")
pmrB_Y358N$clermont
## # A tibble: 16 × 4
## `Clermont Type (ClermonTyping)` freq n denom
## <chr> <dbl> <dbl> <int>
## 1 - 0.333 2 6
## 2 A 0.295 14713 49862
## 3 B1 0.976 53783 55130
## 4 B2 0.000386 14 36252
## 5 C 0.990 5496 5552
## 6 D 0.0301 458 15207
## 7 E 0.898 19666 21893
## 8 E or cladeI 0.341 56 164
## 9 F 0.00887 37 4171
## 10 G 0.100 331 3307
## 11 ND 0.418 220 526
## 12 Non Escherichia 0 0 6
## 13 Unknown 0.0233 336 14446
## 14 albertii 0.25 1 4
## 15 cladeI 0 0 653
## 16 fergusonii 1 1 1
pmrB_Y358N$pathovar
## # A tibble: 19 × 4
## Pathovar freq n denom
## <chr> <dbl> <dbl> <int>
## 1 - 0.270 32596 120659
## 2 E. albertii 0.25 1 4
## 3 E. coli - EHEC 0.958 35843 37415
## 4 E. coli - EIEC 0.480 240 500
## 5 E. coli - EIEC/EHEC 1 1 1
## 6 E. coli - EIEC/EPEC 0 0 1
## 7 E. coli - EPEC 0.527 3351 6362
## 8 E. coli - ERROR 1 2 2
## 9 E. coli - ETEC 0.440 1439 3269
## 10 E. coli - ETEC/EHEC 0.875 126 144
## 11 E. coli - ETEC/EPEC 0.935 43 46
## 12 E. coli - ETEC/STEC 0.326 238 730
## 13 E. coli - STEC 0.810 9681 11954
## 14 ND 0.418 220 526
## 15 Shigella 1 2 2
## 16 Shigella boydii 0.559 457 818
## 17 Shigella dysenteriae 0.981 916 934
## 18 Shigella flexneri 0.999 9953 9964
## 19 Shigella sonnei 0.000361 5 13849
pmrB_Y358N$clermont_plot
pmrB_Y358N$pathovar_plot
# number with glpT_E448K
(gene_counts %>% filter(`Gene symbol`=="glpT_E448K") %>% pull(n))/denominator
## [1] 0.8692433
glpT_E448K <- summarise_marker_dist_ec_types(afp_all=afp_all, ec_types=ec_types, marker="glpT_E448K")
glpT_E448K$clermont
## # A tibble: 16 × 4
## `Clermont Type (ClermonTyping)` freq n denom
## <chr> <dbl> <dbl> <int>
## 1 - 1 6 6
## 2 A 0.572 28543 49862
## 3 B1 0.997 54952 55130
## 4 B2 0.948 34361 36252
## 5 C 0.981 5448 5552
## 6 D 0.996 15151 15207
## 7 E 0.997 21826 21893
## 8 E or cladeI 0.780 128 164
## 9 F 0.997 4159 4171
## 10 G 0.997 3297 3307
## 11 ND 0.914 481 526
## 12 Non Escherichia 1 6 6
## 13 Unknown 0.987 14258 14446
## 14 albertii 1 4 4
## 15 cladeI 0.998 652 653
## 16 fergusonii 1 1 1
glpT_E448K$pathovar
## # A tibble: 19 × 4
## Pathovar freq n denom
## <chr> <dbl> <dbl> <int>
## 1 - 0.831 100296 120659
## 2 E. albertii 1 4 4
## 3 E. coli - EHEC 0.988 36980 37415
## 4 E. coli - EIEC 0.564 282 500
## 5 E. coli - EIEC/EHEC 1 1 1
## 6 E. coli - EIEC/EPEC 1 1 1
## 7 E. coli - EPEC 0.851 5411 6362
## 8 E. coli - ERROR 1 2 2
## 9 E. coli - ETEC 0.728 2381 3269
## 10 E. coli - ETEC/EHEC 0.868 125 144
## 11 E. coli - ETEC/EPEC 0.935 43 46
## 12 E. coli - ETEC/STEC 0.789 576 730
## 13 E. coli - STEC 0.939 11229 11954
## 14 ND 0.914 481 526
## 15 Shigella 1 2 2
## 16 Shigella boydii 0.996 815 818
## 17 Shigella dysenteriae 1 934 934
## 18 Shigella flexneri 0.999 9959 9964
## 19 Shigella sonnei 0.993 13751 13849
glpT_E448K$clermont_plot
glpT_E448K$pathovar_plot
# blaEC from full data, so we get accessions
blaEC <- read_tsv("bla_EC_hits.tsv")
blaEC_summary <- summarise_marker_dist_ec_types(afp_all=afp_all, ec_types=ec_types, marker="blaEC")
blaEC_summary$clermont_plot
blaEC_summary$pathovar_plot
# boxplot of identity of hits, stratified by accession of closest ref seq
blaEC %>% right_join(ec_types %>% filter(Name %in%unique(afp_all$Name))) %>%
mutate(ec_label=paste(`Gene symbol`, `Accession of closest sequence`)) %>%
ggplot(aes(x=ec_label, y=`% Identity to reference sequence`)) + geom_boxplot() + coord_flip()
# distribution of alleles per clade
blaEC_alleles_clade <- blaEC %>% right_join(ec_types %>% filter(Name %in%unique(afp_all$Name))) %>%
ggplot(aes(x=`Clermont Type (ClermonTyping)`, fill=`Accession of closest sequence`)) + geom_bar() + coord_flip()
# distribution of alleles per pathovar
blaEC_alleles_pathovar <- blaEC %>% right_join(ec_types %>% filter(Name %in%unique(afp_all$Name))) %>%
ggplot(aes(x=Pathovar, fill=`Accession of closest sequence`)) + geom_bar() + coord_flip()
blaEC_alleles_clade + blaEC_alleles_pathovar + plot_layout(guides="collect")
(gene_counts %>% filter(`Gene symbol`=="blaEC") %>% pull(n))/denominator
## [1] 0.9647534
cef_cephalosporin <- solo_marker_atb(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ceftriaxone", refgene_subclass = "CEPHALOSPORIN")
cef_cephalosporin$solo_stats
## # A tibble: 60 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 ampC_C-11T 2 resistant 0 0 0 0 0
## 2 ampC_C-42T 29 resistant 1 0.0345 0.0339 0 0.101
## 3 ampC_T-32A 49 resistant 3 0.0612 0.0342 0 0.128
## 4 blaCMY 7 resistant 4 0.571 0.187 0.205 0.938
## 5 blaCMY-145 1 resistant 1 1 0 1 1
## 6 blaCMY-17 1 resistant 1 1 0 1 1
## 7 blaCMY-2 120 resistant 118 0.983 0.0117 0.960 1
## 8 blaCMY-4 4 resistant 3 0.75 0.217 0.326 1
## 9 blaCMY-42 3 resistant 3 1 0 1 1
## 10 blaCMY-6 1 resistant 1 1 0 1 1
## # ℹ 50 more rows
cef_cephalosporin$combined_plot
cef_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ceftriaxone", refgene_subclass = "CEPHALOSPORIN")
cef_model <- logistf(resistant ~ ., data=cef_binary)
cef_logreg <- logreg_details(cef_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
cef_logreg
fos_fosfomycin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="fosfomycin", refgene_subclass="FOSFOMYCIN", afp_matched=afp_matched)
fos_fosfomycin$solo_stats
## # A tibble: 6 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 glpT_E448K 35 resistant 0 0 0 0 0
## 2 uhpT_E350Q 2 resistant 0 0 0 0 0
## 3 <NA> 35 resistant 1 0.0286 0.0282 0 0.0838
## 4 glpT_E448K 35 nonsusceptible 0 0 0 0 0
## 5 uhpT_E350Q 2 nonsusceptible 0 0 0 0 0
## 6 <NA> 35 nonsusceptible 1 0.0286 0.0282 0 0.0838
fos_fosfomycin$combined_plot
fos_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "fosfomycin", refgene_subclass = "FOSFOMYCIN")
fos_model <- logistf(resistant ~ ., data=fos_binary)
fos_logreg <- logreg_details(fos_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
fos_logreg
colistin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="colistin", refgene_subclass="COLISTIN", afp_matched=afp_matched)
colistin$solo_stats
## # A tibble: 10 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 mcr-1.1 1 resistant 0 0 0 0 0
## 2 mcr-2.1 0 resistant 0 NaN NaN NaN NaN
## 3 pmrB_E123D 51 resistant 0 0 0 0 0
## 4 pmrB_Y358N 6 resistant 0 0 0 0 0
## 5 <NA> 48 resistant 7 0.146 0.0509 0.0460 0.246
## 6 mcr-1.1 1 nonsusceptible 1 1 0 1 1
## 7 mcr-2.1 0 nonsusceptible 0 NaN NaN NaN NaN
## 8 pmrB_E123D 51 nonsusceptible 49 0.961 0.0272 0.908 1
## 9 pmrB_Y358N 6 nonsusceptible 4 0.667 0.192 0.289 1
## 10 <NA> 48 nonsusceptible 46 0.958 0.0288 0.902 1
colistin$combined_plot
col_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "colistin", refgene_subclass = "COLISTIN")
col_model <- logistf(resistant ~ ., data=col_binary)
col_logreg <- logreg_details(col_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
col_logreg
gentamicin_agly <- solo_marker_atb(ast_matched = ast_matched, antibiotic="gentamicin", refgene_class="AMINOGLYCOSIDE", afp_matched=afp_matched)
gentamicin_agly$solo_stats
## # A tibble: 34 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 aac(2')-IIa 1 resistant 0 0 0 0 0
## 2 aac(3)-IId 39 resistant 39 1 0 1 1
## 3 aac(3)-IIe 35 resistant 34 0.971 0.0282 0.916 1
## 4 aac(3)-VIa 2 resistant 0 0 0 0 0
## 5 aac(6')-Ib 1 resistant 0 0 0 0 0
## 6 aac(6')-Ib4 6 resistant 0 0 0 0 0
## 7 aadA1 131 resistant 2 0.0153 0.0107 0 0.0363
## 8 aadA16 1 resistant 0 0 0 0 0
## 9 aadA2 32 resistant 0 0 0 0 0
## 10 aadA22 4 resistant 0 0 0 0 0
## # ℹ 24 more rows
gentamicin_agly$combined_plot
gent_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "gentamicin", refgene_class = "AMINOGLYCOSIDE")
gent_model_glm <- glm(resistant ~., family=binomial(link="logit"), data=gent_binary)
gent_logreg_glm <- summary(gent_model_glm)$coef %>%
as_tibble(rownames="marker") %>%
rename(est=Estimate, pval=`Pr(>|z|)`) %>%
select(marker, est, pval) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient", col="p<0.05")
gent_logreg_glm
gent_model <- logistf(resistant ~ ., data=gent_binary[,colSums(gent_binary)>=20])
gent_logistf <- logreg_details(gent_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
gent_logistf
amikacin_agly <- solo_marker_atb(ast_matched = ast_matched, antibiotic="amikacin", refgene_class="AMINOGLYCOSIDE", afp_matched=afp_matched)
amikacin_agly$solo_stats
## # A tibble: 30 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 aac(2')-IIa 1 resistant 0 0 0 0 0
## 2 aac(3)-IId 25 resistant 0 0 0 0 0
## 3 aac(3)-IIe 35 resistant 1 0.0286 0.0282 0 0.0838
## 4 aac(6')-Ib 1 resistant 1 1 0 1 1
## 5 aac(6')-Ib4 5 resistant 0 0 0 0 0
## 6 aadA1 75 resistant 1 0.0133 0.0132 0 0.0393
## 7 aadA16 1 resistant 0 0 0 0 0
## 8 aadA2 21 resistant 0 0 0 0 0
## 9 aadA22 3 resistant 0 0 0 0 0
## 10 aadA5 74 resistant 3 0.0405 0.0229 0 0.0855
## # ℹ 20 more rows
amikacin_agly$combined_plot
amk_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "amikacin", refgene_class = "AMINOGLYCOSIDE")
amk_model <- logistf(resistant ~ ., data=amk_binary)
amk_logreg <- logreg_details(amk_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
amk_logreg
tobramycin_agly <- solo_marker_atb(ast_matched = ast_matched, antibiotic="tobramycin", refgene_class="AMINOGLYCOSIDE", afp_matched=afp_matched)
tobramycin_agly$solo_stats
## # A tibble: 30 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 aac(2')-IIa 1 resistant 0 0 0 0 0
## 2 aac(3)-IId 25 resistant 9 0.36 0.096 0.172 0.548
## 3 aac(3)-IIe 27 resistant 21 0.778 0.0800 0.621 0.935
## 4 aac(6')-Ib 1 resistant 1 1 0 1 1
## 5 aac(6')-Ib4 5 resistant 2 0.4 0.219 0 0.829
## 6 aadA1 75 resistant 0 0 0 0 0
## 7 aadA16 1 resistant 0 0 0 0 0
## 8 aadA2 19 resistant 0 0 0 0 0
## 9 aadA22 3 resistant 0 0 0 0 0
## 10 aadA5 68 resistant 26 0.382 0.0589 0.267 0.498
## # ℹ 20 more rows
tobramycin_agly$combined_plot
tob_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "tobramycin", refgene_class = "AMINOGLYCOSIDE")
tob_model <- logistf(resistant ~ ., data=tob_binary)
tob_logreg <- logreg_details(tob_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
tob_logreg
ampicillin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="ampicillin", refgene_class="BETA-LACTAM", afp_matched=afp_matched)
ampicillin$solo_stats
## # A tibble: 6 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 blaEC 3083 resistant 63 0.0204 0.00255 0.0154 0.0254
## 2 blaEC-5 246 resistant 9 0.0366 0.0120 0.0131 0.0600
## 3 <NA> 1923 resistant 1872 0.973 0.00366 0.966 0.981
## 4 blaEC 3083 nonsusceptible 68 0.0221 0.00265 0.0169 0.0272
## 5 blaEC-5 246 nonsusceptible 9 0.0366 0.0120 0.0131 0.0600
## 6 <NA> 1923 nonsusceptible 1877 0.976 0.00348 0.969 0.983
ampicillin$combined_plot
amp_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ampicillin", refgene_class = "BETA-LACTAM")
#amp_model <- logistf(resistant ~ ., data=amp_binary[,colSums(amp_binary)>=20])
amp_model_glm <- glm(resistant ~., family=binomial(link="logit"), data=amp_binary)
amp_logreg_glm <- summary(amp_model_glm)$coef %>%
as_tibble(rownames="marker") %>%
rename(est=Estimate, pval=`Pr(>|z|)`) %>%
select(marker, est, pval) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient", col="p<0.05")
amp_logreg_glm
azi <- solo_marker_atb(ast_matched = ast_matched, antibiotic="azithromycin", refgene_subclass="AZITHROMYCIN", afp_matched=afp_matched)
azi$solo_stats
## # A tibble: 6 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 mef(B) 3 resistant 0 0 0 0 0
## 2 mph(A) 8 resistant 7 0.875 0.117 0.646 1
## 3 <NA> 2770 resistant 4 0.00144 0.000722 0.0000299 0.00286
## 4 mef(B) 3 nonsusceptible 0 0 0 0 0
## 5 mph(A) 8 nonsusceptible 7 0.875 0.117 0.646 1
## 6 <NA> 2770 nonsusceptible 4 0.00144 0.000722 0.0000299 0.00286
azi$combined_plot
azi_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "azithromycin", refgene_subclass = "AZITHROMYCIN", maf=1)
azi_model <- logistf(resistant ~ ., data=azi_binary)
azi_logreg <- logreg_details(azi_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
azi_logreg
imipenem <- solo_marker_atb(ast_matched = ast_matched, antibiotic="imipenem", refgene_subclass="CARBAPENEM", afp_matched=afp_matched)
imipenem$solo_stats
## # A tibble: 26 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 blaKPC-2 4 resistant 4 1 0 1 1
## 2 blaKPC-3 12 resistant 8 0.667 0.136 0.400 0.933
## 3 blaKPC-4 2 resistant 1 0.5 0.354 0 1
## 4 blaNDM-1 10 resistant 9 0.9 0.0949 0.714 1
## 5 blaNDM-5 4 resistant 4 1 0 1 1
## 6 blaNDM-6 1 resistant 1 1 0 1 1
## 7 blaNDM-7 4 resistant 4 1 0 1 1
## 8 blaOXA-181 1 resistant 0 0 0 0 0
## 9 blaOXA-48 1 resistant 1 1 0 1 1
## 10 ompC_Q171STOP 2 resistant 0 0 0 0 0
## # ℹ 16 more rows
imipenem$combined_plot
imp_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "imipenem", refgene_subclass = "CARBAPENEM", maf=5)
imp_model <- logistf(resistant ~ ., data=imp_binary)
imp_logreg <- logreg_details(imp_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
imp_logreg
meropenem <- solo_marker_atb(ast_matched = ast_matched, antibiotic="meropenem", refgene_subclass="CARBAPENEM", afp_matched=afp_matched)
meropenem$solo_stats
## # A tibble: 26 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 blaKPC-2 4 resistant 3 0.75 0.217 0.326 1
## 2 blaKPC-3 12 resistant 6 0.5 0.144 0.217 0.783
## 3 blaKPC-4 2 resistant 0 0 0 0 0
## 4 blaNDM-1 11 resistant 11 1 0 1 1
## 5 blaNDM-5 8 resistant 8 1 0 1 1
## 6 blaNDM-6 1 resistant 1 1 0 1 1
## 7 blaNDM-7 4 resistant 4 1 0 1 1
## 8 blaOXA-181 1 resistant 0 0 0 0 0
## 9 blaOXA-48 1 resistant 1 1 0 1 1
## 10 ompC_Q171STOP 2 resistant 1 0.5 0.354 0 1
## # ℹ 16 more rows
meropenem$combined_plot
mer_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "meropenem", refgene_subclass = "CARBAPENEM", maf=5)
mer_model <- logistf(resistant ~ ., data=mer_binary)
mer_logreg <- logreg_details(mer_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
mer_logreg
ceftazidime_cephalosporin <- solo_marker_atb(ast_matched = ast_matched, antibiotic="ceftazidime", refgene_subclass="CEPHALOSPORIN", afp_matched=afp_matched)
ceftazidime_cephalosporin$solo_stats
## # A tibble: 66 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 ampC_C-11T 5 resistant 0 0 0 0 0
## 2 ampC_C-42T 29 resistant 2 0.0690 0.0471 0 0.161
## 3 ampC_G-15GG 2 resistant 0 0 0 0 0
## 4 ampC_T-14TGT 2 resistant 0 0 0 0 0
## 5 ampC_T-32A 58 resistant 1 0.0172 0.0171 0 0.0507
## 6 blaCMY 2 resistant 0 0 0 0 0
## 7 blaCMY-145 1 resistant 1 1 0 1 1
## 8 blaCMY-2 117 resistant 104 0.889 0.0291 0.832 0.946
## 9 blaCMY-4 4 resistant 3 0.75 0.217 0.326 1
## 10 blaCMY-42 5 resistant 5 1 0 1 1
## # ℹ 56 more rows
ceftazidime_cephalosporin$combined_plot
caz_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ceftazidime", refgene_subclass = "CEPHALOSPORIN")
caz_model <- logistf(resistant ~ ., data=caz_binary)
caz_logreg <- logreg_details(caz_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
caz_logreg
chloramphenicol <- solo_marker_atb(ast_matched = ast_matched, antibiotic="chloramphenicol", refgene_class="PHENICOL", afp_matched=afp_matched)
chloramphenicol$solo_stats
## # A tibble: 14 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 catA1 17 resistant 17 1 0 1 1
## 2 catA2 1 resistant 1 1 0 1 1
## 3 catB3 0 resistant 0 NaN NaN NaN NaN
## 4 cmlA1 21 resistant 17 0.810 0.0857 0.642 0.977
## 5 cmlA5 0 resistant 0 NaN NaN NaN NaN
## 6 floR 84 resistant 83 0.988 0.0118 0.965 1
## 7 <NA> 2670 resistant 13 0.00487 0.00135 0.00223 0.00751
## 8 catA1 17 nonsuscept… 17 1 0 1 1
## 9 catA2 1 nonsuscept… 1 1 0 1 1
## 10 catB3 0 nonsuscept… 0 NaN NaN NaN NaN
## 11 cmlA1 21 nonsuscept… 18 0.857 0.0764 0.707 1
## 12 cmlA5 0 nonsuscept… 0 NaN NaN NaN NaN
## 13 floR 84 nonsuscept… 83 0.988 0.0118 0.965 1
## 14 <NA> 2670 nonsuscept… 57 0.0213 0.00280 0.0159 0.0268
chloramphenicol$combined_plot
chloramphenicol_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "chloramphenicol", refgene_class = "PHENICOL")
chloramphenicol_model <- logistf(resistant ~ ., data=chloramphenicol_binary)
chloramphenicol_logreg <- logreg_details(chloramphenicol_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
chloramphenicol_logreg
tmx <- solo_marker_atb(ast_matched = ast_matched, antibiotic="trimethoprim-sulfamethoxazole", refgene_class=c("TRIMETHOPRIM", "SULFONAMIDE"), afp_matched=afp_matched)
tmx$solo_stats
## # A tibble: 26 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 dfrA1 32 resistant 9 0.281 0.0795 0.125 0.437
## 2 dfrA12 5 resistant 0 0 0 0 0
## 3 dfrA14 8 resistant 3 0.375 0.171 0.0395 0.710
## 4 dfrA16 3 resistant 0 0 0 0 0
## 5 dfrA17 18 resistant 11 0.611 0.115 0.386 0.836
## 6 dfrA5 8 resistant 0 0 0 0 0
## 7 dfrA7 14 resistant 2 0.143 0.0935 0 0.326
## 8 dfrA8 3 resistant 2 0.667 0.272 0.133 1
## 9 folP_P64S 1 resistant 0 0 0 0 0
## 10 sul1 220 resistant 0 0 0 0 0
## # ℹ 16 more rows
tmx$combined_plot
tmx_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "trimethoprim-sulfamethoxazole", refgene_class = c("TRIMETHOPRIM", "SULFONAMIDE"))
tmx_model <- logistf(resistant ~ ., data=tmx_binary)
tmx_logreg <- logreg_details(tmx_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
tmx_logreg
# solo markers for tetracycline
tet <- solo_marker_atb(ast_matched = ast_matched, antibiotic="tetracycline", refgene_class="TETRACYCLINE", afp_matched=afp_matched)
tet$solo_stats
## # A tibble: 16 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 acrB_R620C 3 resistant 0 0 0 0 0
## 2 tet(A) 943 resistant 938 0.995 0.00236 0.990 0.999
## 3 tet(B) 826 resistant 818 0.990 0.00341 0.984 0.997
## 4 tet(C) 47 resistant 15 0.319 0.0680 0.186 0.452
## 5 tet(D) 28 resistant 28 1 0 1 1
## 6 tet(H) 2 resistant 2 1 0 1 1
## 7 tet(M) 6 resistant 0 0 0 0 0
## 8 <NA> 3318 resistant 177 0.0533 0.00390 0.0457 0.0610
## 9 acrB_R620C 3 nonsusceptible 0 0 0 0 0
## 10 tet(A) 943 nonsusceptible 938 0.995 0.00236 0.990 0.999
## 11 tet(B) 826 nonsusceptible 818 0.990 0.00341 0.984 0.997
## 12 tet(C) 47 nonsusceptible 43 0.915 0.0407 0.835 0.995
## 13 tet(D) 28 nonsusceptible 28 1 0 1 1
## 14 tet(H) 2 nonsusceptible 2 1 0 1 1
## 15 tet(M) 6 nonsusceptible 0 0 0 0 0
## 16 <NA> 3318 nonsusceptible 193 0.0582 0.00406 0.0502 0.0661
tet$combined_plot
tet_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "tetracycline", refgene_class = "TETRACYCLINE")
tet_model <- logistf(resistant ~ ., data=tet_binary)
tet_logreg <- logreg_details(tet_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
tet_logreg
tig <- solo_marker_atb(ast_matched = ast_matched, antibiotic="tigecycline", refgene_subclass="TIGECYCLINE", afp_matched=afp_matched)
tig$solo_stats
## # A tibble: 2 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 <NA> 153 resistant 2 0.0131 0.00918 0 0.0311
## 2 <NA> 153 nonsusceptible 2 0.0131 0.00918 0 0.0311
cip <- solo_marker_atb(ast_matched = ast_matched, antibiotic="ciprofloxacin", refgene_subclass="QUINOLONE", afp_matched=afp_matched)
cip$solo_stats
## # A tibble: 46 × 8
## `Gene symbol` total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 aac(6')-Ib-cr5 1 resistant 0 0 0 0 0
## 2 gyrA_D87G 4 resistant 0 0 0 0 0
## 3 gyrA_D87N 5 resistant 0 0 0 0 0
## 4 gyrA_D87Y 3 resistant 1 0.333 0.272 0 0.867
## 5 gyrA_S83A 12 resistant 0 0 0 0 0
## 6 gyrA_S83L 111 resistant 17 0.153 0.0342 0.0862 0.220
## 7 marR_S3N 524 resistant 4 0.00763 0.00380 0.000181 0.0151
## 8 parC_A56T 7 resistant 0 0 0 0 0
## 9 parC_S57T 40 resistant 0 0 0 0 0
## 10 parE_D475E 91 resistant 0 0 0 0 0
## # ℹ 36 more rows
cip$combined_plot
cip_binary <- getBinMatrix(ast_matched = ast_matched, afp_matched=afp_matched, antibiotic = "ciprofloxacin", refgene_class = "QUINOLONE")
cip_model <- logistf(resistant ~ ., data=cip_binary)
cip_logreg <- logreg_details(cip_model) %>%
mutate(sig=if_else(pval<0.05, TRUE, FALSE)) %>%
filter(marker != "(Intercept)") %>%
mutate(marker=gsub("`","",marker)) %>%
ggplot(aes(y=marker, col=sig)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper)) +
geom_point(aes(x=est)) +
scale_color_manual(values=c("grey", "black")) +
theme_light() + labs(x="Logist regression coefficient (95% CI)", col="p<0.05")
cip_logreg